Initial visualization : TikTok Posts vs Spotify Track Score¶

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the filtered data from your local file
filtered_data = pd.read_excel("C:\\Users\\quent\\Downloads\\Most_Streamed_Spotify_Songs_2024_filtered.xlsx")

# Sort the data by Track Score and select the top 100 songs
top_100_data = filtered_data.sort_values(by='Track Score', ascending=False).head(100)

# Create a scatter plot to compare TikTok Posts with Spotify Track Score
plt.figure(figsize=(10, 6))
plt.scatter(top_100_data['Track Score'], top_100_data['TikTok Posts'], alpha=0.7)
plt.title('Top 100 Songs: TikTok Posts vs. Spotify Track Score')
plt.xlabel('Spotify Track Score')
plt.ylabel('TikTok Posts')
plt.grid(True)

# Show the plot
plt.show()

Rework to Show TikTok Posts in scale of Millions, not single digits...¶

In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the filtered data from your local file
filtered_data = pd.read_excel("C:\\Users\\quent\\Downloads\\Most_Streamed_Spotify_Songs_2024_filtered.xlsx")

# Sort the data by Track Score and select the top 100 songs
top_100_data = filtered_data.sort_values(by='Track Score', ascending=False).head(100)

# Convert TikTok Posts from thousands to millions
top_100_data['TikTok Posts (in Millions)'] = top_100_data['TikTok Posts'] / 1_000_000

# Create a scatter plot using seaborn
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Track Score', y='TikTok Posts (in Millions)', data=top_100_data)
plt.title('Top 100 Songs: TikTok Posts (in Millions) vs. Spotify Track Score')
plt.xlabel('Spotify Track Score')
plt.ylabel('TikTok Posts (in Millions)')
plt.grid(True)

# Show the plot
plt.show()

Updated to Open plot with browser, enlarge dots, added regression line¶

In [3]:
import pandas as pd
import plotly.express as px

# Load the filtered data from your local file
filtered_data = pd.read_excel("C:\\Users\\quent\\Downloads\\Most_Streamed_Spotify_Songs_2024_filtered.xlsx")

# Sort the data by Track Score and select the top 100 songs
top_100_data = filtered_data.sort_values(by='Track Score', ascending=False).head(100)

# Convert TikTok Posts from thousands to millions
top_100_data['TikTok Posts (in Millions)'] = top_100_data['TikTok Posts'] / 1_000_000

# Create an interactive scatter plot with a regression line using Plotly
fig = px.scatter(
    top_100_data,
    x='Track Score',
    y='TikTok Posts (in Millions)',
    hover_data=['Track', 'Artist'],
    trendline="ols",  # Add a regression line
    title='Top 100 Songs: TikTok Posts (in Millions) vs. Spotify Track Score',
    labels={
        'Track Score': 'Spotify Track Score',
        'TikTok Posts (in Millions)': 'TikTok Posts (in Millions)'
    },
    template='plotly',  # Use a template for consistent styling
)

# Update the trendline (regression line) to be dashed
fig.update_traces(marker=dict(size=12),  # Increase the size of the dots
                  line=dict(dash="dash"))  # Make the regression line dashed

# Save the plot as an HTML file
fig.write_html("C:\\Users\\quent\\Desktop\\interactive_Spotify_TikTok_plot.html")

# Optional: To open the HTML file in a web browser automatically
import webbrowser
webbrowser.open("C:\\Users\\quent\\Desktop\\interactive_Spotify_TikTok_plot.html")
Out[3]:
True

Final Visualizations: Top 10 Songs by Spotify Playcounts and Top 10 Songs by TikTok Posts¶

In [4]:
import plotly.express as px
import pandas as pd

# Load the dataset
file_path = "C:\\Users\\quent\\Downloads\\Most_Streamed_Spotify_Songs_2024_filtered.xlsx"
data = pd.read_excel(file_path)

top_artists_by_tiktok = data.groupby('Artist')['TikTok Posts'].sum().sort_values(ascending=False).head(10)

import plotly.express as px

# Create a bar chart for the top 10 artists by TikTok Posts
fig_tiktok = px.bar(
    top_artists_by_tiktok.reset_index(),
    x='Artist',
    y='TikTok Posts',
    title='Top 10 Artists by TikTok Posts',
    labels={'TikTok Posts': 'TikTok Posts'},
)

# Show the plot
fig_tiktok.show()

#aggregate data by artist and Spotify Playlist Count
top_artists_by_spotify = data.groupby('Artist')['Spotify Playlist Count'].sum().sort_values(ascending=False).head(10)

# Create a bar chart for the top 10 artists by Spotify Playlist Count
fig_spotify = px.bar(
    top_artists_by_spotify.reset_index(),
    x='Artist',
    y='Spotify Playlist Count',
    title='Top 10 Artists by Spotify Playlist Count',
    labels={'Spotify Playlist Count': 'Spotify Playlist Count'},
)

# Show the plot
fig_spotify.show()

fig_tiktok.write_html('C:\\Users\\quent\\Downloads\\top_10_artists_by_tiktok.html')
fig_spotify.write_html('C:\\Users\\quent\\Downloads\\top_10_artists_by_spotify.html')
In [5]:
plt.figure(figsize=(10, 6))
sns.swarmplot(x=data['Track Score'], size=3)
plt.title('Swarm Plot of Track Scores')
plt.xlabel('Track Score')

plt.show()
C:\Users\quent\anaconda3\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

6.7% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

C:\Users\quent\anaconda3\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

65.7% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

Using Jupyter Widgets (ipywidgets)¶

In [6]:
import ipywidgets as widgets
from IPython.display import display

# Dropdown to select visualization type
dropdown = widgets.Dropdown(
    options=['Histogram', 'Box Plot', 'Violin Plot'],
    value='Histogram',
    description='Plot Type:',
)

# Function to update the plot based on dropdown selection
def update_plot(plot_type):
    if plot_type == 'Histogram':
        sns.histplot(data['Track Score'], bins=20)
    elif plot_type == 'Box Plot':
        sns.boxplot(x=data['Track Score'])
    elif plot_type == 'Violin Plot':
        sns.violinplot(x=data['Track Score'])
    plt.show()

# Observe dropdown changes and update plot
dropdown.observe(lambda change: update_plot(change['new']), names='value')
display(dropdown)
update_plot(dropdown.value)
Dropdown(description='Plot Type:', options=('Histogram', 'Box Plot', 'Violin Plot'), value='Histogram')

2 Using Plotly and Plotly Dash in Jupyter¶

In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
from jupyter_dash import JupyterDash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd

# Load your data
file_path = "C:\\Users\\quent\\Downloads\\Most_Streamed_Spotify_Songs_2024_filtered.xlsx"
data = pd.read_excel(file_path)

# Initialize the app
app = JupyterDash(__name__)

# Define the app layout
app.layout = html.Div([
    dcc.Dropdown(
        id='plot-type',
        options=[
            {'label': 'Histogram of Track Scores', 'value': 'hist'},
            {'label': 'Box Plot of Track Scores', 'value': 'box'},
            {'label': 'Violin Plot of Track Scores', 'value': 'violin'}
        ],
        value='hist'
    ),
    dcc.Graph(id='graph')
])

# Define the callback to update the graph
@app.callback(
    Output('graph', 'figure'),
    [Input('plot-type', 'value')]
)
def update_figure(plot_type):
    if plot_type == 'hist':
        fig = px.histogram(data, x='Track Score')
    elif plot_type == 'box':
        fig = px.box(data, x='Track Score')
    elif plot_type == 'violin':
        fig = px.violin(data, x='Track Score')
    return fig

# Run the app in an external browser
app.run_server(mode='external')
Dash app running on http://127.0.0.1:8050/
C:\Users\quent\anaconda3\Lib\site-packages\dash\dash.py:556: UserWarning:

JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.

In [ ]: